Data Downloaded from U.S. Environmental Protection Agency (EPA). “Pollutant Emissions Trends Data.” National Air Quality website, February 2024. Available at EPA Pollutant Emissions Trends (Link)[https://www.epa.gov/air-emissions-inventories/air-pollutant-emissions-trends-data]
Datasets: National Tier 1 CAPS Trends riteria pollutants National Tier 1 for 1970 - 2023. National and State CAPS Trends by Tier 1 and EIS Sector National and State CAPS Trends by Tier 1 and EIS Sector for 2002 - 2023.
# Load national data (assuming it's available in the dataset)
national_air_pollution_trends_data <- read.csv(here::here("Data", "EPA_NEI_National_State_Trends", "National_State_Tier1_Sector_2002_2023_Caps - National.csv"))
# Clean the national dataset
national_air_pollution_cleaned <- national_air_pollution_trends_data %>%
gather(key = "year", value = "emissions", starts_with("emissions")) %>%
mutate(year = as.numeric(gsub("emissions", "", year))) %>%
filter(!is.na(emissions)) %>%
filter(Pollutant %in% c("NH3", "NOX", "SO2", "PMI", "PM10-PRI", "PM25-PRI"))
# Calculate mean emissions by year and pollutant at the national level
national_means <- national_air_pollution_cleaned %>%
group_by(year, Pollutant) %>%
dplyr::summarize(mean_emissions = mean(emissions, na.rm = TRUE))## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
# Plot national mean emissions over time
ggplot(national_means, aes(x = year, y = mean_emissions, color = Pollutant)) +
geom_line(size = 1.2) +
labs(title = "National Air Pollution Trends for Selected Pollutants (2002-2023)",
x = "Year",
y = "Mean Emissions (tons)",
color = "Pollutant") +
theme_minimal()## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Calculate mean emissions by year and pollutant at the state level
# Load the dataset skipping initial rows and renaming columns
nox_data <- read.csv(here::here("Data", "EPA_NEI_National_State_Trends",
"National_Tier1_CAPS_Trends - NOX.csv"),
skip = 5)
so2_data <- read.csv(here::here("Data", "EPA_NEI_National_State_Trends",
"National_Tier1_CAPS_Trends - SO2.csv"),
skip = 5)
clean_pollutant_data <- function(data, pollutant_name) {
data %>%
pivot_longer(
cols = starts_with("X"),
names_to = "Year",
values_to = "Emissions"
) %>%
mutate(
Year = as.numeric(gsub("X", "", Year)),
Emissions = as.numeric(gsub("[^0-9\\.]", "", Emissions)), # Remove non-numeric characters
Pollutant = pollutant_name
) %>%
filter(!is.na(Emissions)) %>%
mutate(Source.Category = toupper(Source.Category))
}
# Apply the updated cleaning function
nox_cleaned <- clean_pollutant_data(nox_data, "NOX")
so2_cleaned <- clean_pollutant_data(so2_data, "SO2")
ggplot(nox_cleaned, aes(x = Year, y = Emissions, color = Pollutant)) +
geom_line() +
facet_wrap(~Source.Category, scales = "free_y", ncol=4) +
scale_color_manual(values = c("NOX" = "royalblue", "SO2" = "orangered")) +
labs(title = "Emissions Over Time by Pollutant (NOX)",
x = "Year",
y = "Emissions",
color = "Pollutant") +
theme_minimal() +
theme(legend.position = "none",
text = element_text(size = 7),
axis.text.x = element_text(angle = 45, hjust = 1),
strip.text = element_text(size = 5),
title = element_text(size = 10))ggplot(so2_cleaned, aes(x = Year, y = Emissions, color = Pollutant)) +
geom_line() +
facet_wrap(~Source.Category, scales = "free_y", ncol=4) +
scale_color_manual(values = c("NOX" = "royalblue", "SO2" = "orangered")) +
labs(title = "Emissions Over Time by Pollutant (SO2)",
x = "Year",
y = "Emissions",
color = "Pollutant") +
theme_minimal() +
theme(legend.position = "none",
text = element_text(size = 7),
axis.text.x = element_text(angle = 45, hjust = 1),
strip.text = element_text(size = 5),
title = element_text(size = 10))
# NEI Emissions Data
# Define the file path
file_path <- here::here("Data", "EPA_NEI_National_State_Trends", "NEI_2020_Acidfying_Gases_States_of_Interest.xlsx")
# Load the data from the first sheet
acid_gases_data <- read_excel(file_path)
# If you need to load a specific sheet, specify it like this:
# acid_gases_data <- read_excel(file_path, sheet = "Sheet1")
# Extract Latitude and Longitude from the Lat/Lon column
acid_gases_data <- acid_gases_data %>%
mutate(
Longitude = as.numeric(gsub("\\[|\\]|,.*", "", `Lat/Lon`)),
Latitude = as.numeric(gsub("\\[|\\]|.*,", "", `Lat/Lon`)),
.keep = "unused"
)
# Convert to an sf object
acid_gases_sf <- acid_gases_data %>%
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE)
# Load state boundaries
states <- states(cb = TRUE) %>% suppressMessages()
# Plot the emissions data with state boundaries, faceted by Pollutant
ggplot() +
geom_sf(data = states, fill = NA, color = "black") + # Add state boundaries
geom_sf(data = acid_gases_sf, aes(size = `Emissions (Tons)`, color = Pollutant),
alpha = 0.7, size=0.2)+
theme_minimal() +
coord_sf(xlim = c(-92.5, -72.5), ylim = c(34, 44)) + # Adjust the coordinate limits if needed
labs(title = "Acidifying Gas Emissions by Facility",
subtitle = "Locations of Point Source Facilities",
color = "Pollutant", size = "Emissions (Tons)") +
theme(
legend.position = "bottom", # Move legend to the bottom
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
legend.box = "horizontal" # Arrange the legends horizontally
) +
facet_wrap(~ Pollutant) # Keep faceting by Pollutant# Filter the data for SO2 emissions
SO2_data <- acid_gases_data %>%
filter(Pollutant == "Sulfur Dioxide")
# Convert to an sf object
SO2_sf <- SO2_data %>%
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE)
# Plot the SO2 emissions data with state boundaries
ggplot() +
geom_sf(data = states, fill = NA, color = "black") + # Add state boundaries
geom_sf(data = SO2_sf, aes(size = `Emissions (Tons)`), color = "royalblue", alpha = 0.7) +
theme_minimal() +
coord_sf(xlim = c(-92.5, -72.5), ylim = c(34, 44)) + # Adjust the coordinate limits if needed
scale_size_continuous(range = c(1, 5), limits = c(0, 30000)) + # Adjust the point size based on emission tons
labs(title = "Sulfur Dioxide (SO₂) Emissions by Facility",
subtitle = "Distribution and Intensity of Emissions",
size = "Emissions (Tons)") +
theme(
legend.position = "bottom", # Move legend to the bottom
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
legend.box = "horizontal" # Arrange the legends horizontally
)# Filter the data for NOₓ emissions
NOx_data <- acid_gases_data %>%
filter(Pollutant == "Nitrous Oxide")
# Convert to an sf object
NOx_sf <- NOx_data %>%
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE)
# Plot the NOₓ emissions data with state boundaries
ggplot() +
geom_sf(data = states, fill = NA, color = "black") + # Add state boundaries
geom_sf(data = NOx_sf, aes(size = `Emissions (Tons)`), color = "lightgreen", alpha = 0.7) +
theme_minimal() +
coord_sf(xlim = c(-92.5, -72.5), ylim = c(34, 44)) + # Adjust the coordinate limits if needed
scale_size_continuous(range = c(1, 5), limits = c(0, 1000)) + # Adjust the point size based on emission tons
labs(title = "Nitrous Oxide (NOx) Emissions by Facility",
subtitle = "Distribution and Intensity of Emissions",
size = "Emissions (Tons)") +
theme(
legend.position = "bottom", # Move legend to the bottom
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
legend.box = "horizontal" # Arrange the legends horizontally
)# Filter the data for Ammonia emissions
Ammonia_data <- acid_gases_data %>%
filter(Pollutant == "Ammonia")
# Convert to an sf object
Ammonia_sf <- Ammonia_data %>%
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE)
# Plot the Ammonia emissions data with state boundaries
ggplot() +
geom_sf(data = states, fill = NA, color = "black") + # Add state boundaries
geom_sf(data = Ammonia_sf, aes(size = `Emissions (Tons)`), color = "orangered", alpha = 0.7) +
theme_minimal() +
coord_sf(xlim = c(-92.5, -72.5), ylim = c(34, 44)) + # Adjust the coordinate limits if needed
scale_size_continuous(range = c(1, 5), limits = c(0, 1200)) + # Adjust the point size based on emission tons
labs(title = "Ammonia (NH₄) Emissions by Facility",
subtitle = "Distribution and Intensity of Emissions",
size = "Emissions (Tons)") +
theme(
legend.position = "bottom", # Move legend to the bottom
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
legend.box = "horizontal" # Arrange the legends horizontally
)
# Emissions Rankings by State, County, & Industry
# Group by State and Pollutant, then calculate total emissions and count the number of facilities
state_facilities_ranking <- acid_gases_data %>%
group_by(State, Pollutant) %>%
summarize(
Facility_Count = n(), # Count the number of facilities
Total_Emissions = sum(`Emissions (Tons)`, na.rm = TRUE) # Sum the emissions
) %>%
arrange(desc(Total_Emissions))## `summarise()` has grouped output by 'State'. You can override using the
## `.groups` argument.
## # A tibble: 21 × 4
## # Groups: State [7]
## State Pollutant Facility_Count Total_Emissions
## <chr> <chr> <int> <dbl>
## 1 Ohio Sulfur Dioxide 932 105750.
## 2 Indiana Sulfur Dioxide 381 70739.
## 3 Illinois Sulfur Dioxide 2205 61748.
## 4 Pennsylvania Sulfur Dioxide 1239 48814.
## 5 Kentucky Sulfur Dioxide 1034 48083.
## 6 West Virginia Sulfur Dioxide 206 37510.
## 7 Tennessee Sulfur Dioxide 453 16735.
## 8 Ohio Ammonia 393 3630.
## 9 Ohio Nitrous Oxide 205 2102.
## 10 Pennsylvania Ammonia 644 1770.
## # ℹ 11 more rows
# Group by State-County and Pollutant, then calculate total emissions and count the number of facilities
county_facilities_ranking <- acid_gases_data %>%
group_by(`State-County`, Pollutant) %>%
summarize(
Facility_Count = n(), # Count the number of facilities
Total_Emissions = sum(`Emissions (Tons)`, na.rm = TRUE) # Sum the emissions
) %>%
arrange(desc(Total_Emissions))## `summarise()` has grouped output by 'State-County'. You can override using the
## `.groups` argument.
## # A tibble: 1,407 × 4
## # Groups: State-County [573]
## `State-County` Pollutant Facility_Count Total_Emissions
## <chr> <chr> <int> <dbl>
## 1 OH - Gallia Sulfur Dioxide 4 31481.
## 2 OH - Hamilton Sulfur Dioxide 47 16875.
## 3 IN - Gibson Sulfur Dioxide 3 13394.
## 4 PA - Armstrong Sulfur Dioxide 12 13015.
## 5 PA - Indiana Sulfur Dioxide 13 12659.
## 6 IN - Lake Sulfur Dioxide 44 11789.
## 7 OH - Jefferson Sulfur Dioxide 9 11651.
## 8 IN - Porter Sulfur Dioxide 15 10736.
## 9 IL - Macon Sulfur Dioxide 32 9846.
## 10 IL - Washington Sulfur Dioxide 5 9779.
## # ℹ 1,397 more rows
# Group by NAICS and Pollutant, then calculate total emissions and count the number of facilities
industry_facilities_ranking <- acid_gases_data %>%
group_by(NAICS, Pollutant) %>%
summarize(
Facility_Count = n(), # Count the number of facilities
Total_Emissions = sum(`Emissions (Tons)`, na.rm = TRUE) # Sum the emissions
) %>%
arrange(desc(Total_Emissions))## `summarise()` has grouped output by 'NAICS'. You can override using the
## `.groups` argument.
## # A tibble: 1,321 × 4
## # Groups: NAICS [511]
## NAICS Pollutant Facility_Count Total_Emissions
## <chr> <chr> <int> <dbl>
## 1 Fossil Fuel Electric Power Generati… Sulfur D… 314 259057.
## 2 All Other Petroleum and Coal Produc… Sulfur D… 16 23778.
## 3 Iron and Steel Mills and Ferroalloy… Sulfur D… 72 12295.
## 4 Wet Corn Milling and Starch Manufac… Sulfur D… 13 11806.
## 5 Solid Waste Landfill Sulfur D… 192 10961.
## 6 Cement Manufacturing Sulfur D… 20 9371.
## 7 Electric Bulk Power Transmission an… Sulfur D… 6 8611.
## 8 Lime Manufacturing Sulfur D… 10 8028.
## 9 Alumina Refining and Primary Alumin… Sulfur D… 3 7769.
## 10 Fossil Fuel Electric Power Generati… Nitrous … 248 4583.
## # ℹ 1,311 more rows
# Download county boundaries for the entire U.S.
counties <- counties(cb = TRUE, resolution = "5m", year=2020) %>%
st_as_sf() # Convert to an sf object
# Merge the county facilities ranking with the spatial county boundaries using the FIPS code
county_map_data <- counties %>%
left_join(acid_gases_data %>%
group_by(FIPS, Pollutant) %>%
summarize(
Facility_Count = n(),
Total_Emissions = sum(`Emissions (Tons)`, na.rm = TRUE)
), by = c("GEOID" = "FIPS"))## `summarise()` has grouped output by 'FIPS'. You can override using the
## `.groups` argument.
## Simple feature collection with 6 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -115 ymin: 30.7 xmax: -85.5 ymax: 49
## Geodetic CRS: NAD83
## STATEFP COUNTYFP COUNTYNS AFFGEOID GEOID NAME NAMELSAD
## 1 04 023 00040472 0500000US04023 04023 Santa Cruz Santa Cruz County
## 2 12 059 00295735 0500000US12059 12059 Holmes Holmes County
## 3 27 081 00659486 0500000US27081 27081 Lincoln Lincoln County
## 4 30 029 01719611 0500000US30029 30029 Flathead Flathead County
## 5 31 135 00835889 0500000US31135 31135 Perkins Perkins County
## 6 38 075 01034229 0500000US38075 38075 Renville Renville County
## STUSPS STATE_NAME LSAD ALAND AWATER Pollutant Facility_Count
## 1 AZ Arizona 06 3201853240 3068237 <NA> NA
## 2 FL Florida 06 1240232910 26393540 <NA> NA
## 3 MN Minnesota 06 1390265112 30167787 <NA> NA
## 4 MT Montana 06 13175844498 437162491 <NA> NA
## 5 NE Nebraska 06 2287768800 2840176 <NA> NA
## 6 ND North Dakota 06 2272050275 40658499 <NA> NA
## Total_Emissions geometry
## 1 NA MULTIPOLYGON (((-111.4 31.5...
## 2 NA MULTIPOLYGON (((-86.04 30.9...
## 3 NA MULTIPOLYGON (((-96.45 44.2...
## 4 NA MULTIPOLYGON (((-115 48.23,...
## 5 NA MULTIPOLYGON (((-102.1 41, ...
## 6 NA MULTIPOLYGON (((-102 48.81,...
# Ensure the 'NA' values in the Pollutant column are excluded
county_map_data <- county_map_data %>%
dplyr::filter(!is.na(Pollutant))
# Function to plot each pollutant separately with a more visible gradient
plot_pollutant_separately <- function(data, variable, title, subtitle) {
unique_pollutants <- unique(data$Pollutant)
plots <- lapply(unique_pollutants, function(pollutant) {
p_data <- data %>% dplyr::filter(Pollutant == pollutant)
# Calculate the range of the data for the current pollutant
value_range <- range(p_data[[variable]], na.rm = TRUE)
ggplot(data = p_data) +
geom_sf(aes_string(fill = variable)) +
scale_fill_viridis_c(
option = "C",
limits = value_range, # Use the range of the current data
trans = "log", # Apply a logarithmic transformation for better visibility
breaks = scales::pretty_breaks(n = 5) # Adjust the number of breaks in the gradient
) +
coord_sf(xlim = c(-92.5, -72.5), ylim = c(34, 44)) + # Adjust the coordinate limits if needed
labs(title = paste(title, "-", pollutant),
subtitle = subtitle,
fill = variable) +
theme_minimal()
})
return(plots)
}
# Plot the Facility Count by County separately for each pollutant
facility_count_plots <- plot_pollutant_separately(
county_map_data,
"Facility_Count",
"Facility Count by County",
"Total number of facilities per county"
)## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Plot the Total Emissions by County separately for each pollutant
total_emissions_plots <- plot_pollutant_separately(
county_map_data,
"Total_Emissions",
"Total Emissions by County",
"Total emissions in tons per county"
)
# Display the plots
facility_count_plots[[1]]